#!/usr/bin/env Rscript
require(plyr)
require(rvest)

webpage_stations <- 
  "https://www.meteonetwork.it/rete/stazioni-meteo.php"

hrefs <- 
  xml2::read_html(webpage_stations) %>%
  html_nodes(xpath = "/html/body/div[3]/div[2]/table/tbody/tr/td[1]/a") %>%
  html_attr("href")

stations <- 
  gsub("http://my.meteonetwork.it/station/|/", "", hrefs)

getStationDetails <- function(x) {
  dettagli_url <- 
    "http://my.meteonetwork.it/station/%s/stazione"
  
  table.df <-
    xml2::read_html(sprintf(dettagli_url, x)) %>%
    html_node("table") %>%
    html_table() %>%
    t() %>%
    as.data.frame()
  
  colnames(table.df) <- 
    table.df[1,]
  
  rownames(table.df) <- 
    NULL
  
  table.df <- 
    table.df %>%
    dplyr::slice(-1)
  
  return(table.df)
}

station_details <- data.frame()
for (station in stations) {
  print(station)
  station_details <- 
    plyr::rbind.fill(station_details,
          getStationDetails(station) %>% 
            dplyr::mutate(code = station))
  Sys.sleep(sample(1:5,1))
}
save(station_details, file = "station_details.RData")
